# -*- coding:utf8 -*-
from filter_words import FilterWord


def start_freq():
    """
    启动词频统计脚本
    :return:
    """
    f = FilterWord()
    counts = f.get_info_word_counts()
    # 一批喂10个
    batch_size = 10
    print "总共有%d条数据准备插入..." % counts
    # 计算需要迭代多少次
    times = counts / batch_size + 1
    for i in range(times):
        result = f.get_info_word_msg(from_num=i * batch_size, batch_size=batch_size)
        for item in result:
            dict_msg = f.change_info_word_2_freq_dict(item)
            f.save_to_mongodb(dict_msg)
        if i % 50 == 0:
            print "当前进度 %d / %d" % (i * batch_size, counts)


if __name__ == '__main__':
   start_freq()